# Purpose:
# Replicate the main permutation-based chi-square analysis testing the association
# between merged disfluency type-location and cognitive-process category, while
# stratifying by participant.

library(readr)
library(dplyr)
library(coin)

input_file <- "data/change to data file name"
output_file <- "outputs/02_main_permutation_test.txt"

df <- read_csv(input_file, show_col_types = FALSE)

required_cols <- c("participant_id", "tc", "disf_loc")
missing_cols <- setdiff(required_cols, names(df))
if (length(missing_cols) > 0) {
  stop("Missing required columns: ", paste(missing_cols, collapse = ", "))
}

df <- df %>%
  mutate(
    participant_id = as.factor(participant_id),
    tc = as.factor(tc),
    disf_loc = as.factor(disf_loc)
  )

set.seed(123)

perm_test <- independence_test(
  disf_loc ~ tc | participant_id,
  data = df,
  teststat = "quad",
  distribution = approximate(nresample = 10000)
)

sink(output_file)
cat("Main permutation-based chi-square test\n\n")
print(perm_test)
sink()
